library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(plotly)
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout

Load the data

## will not work, because the csv is format is absolutely messed up

#climate_data <- read_csv2('https://www.wien.gv.at/statistik/ogd/jahrbuch/klimaundwetter/tab_1.3.2_klimaundwetter_.csv')
read_file('https://www.wien.gv.at/statistik/ogd/jahrbuch/klimaundwetter/tab_1.3.2_klimaundwetter_.csv') %>%
    # remove title line
    str_remove('^.*\r\n') %>%
    # remove footer lines
    str_remove('Quelle(.|\n|\r)*') %>%
    # remove weird reference to 2nd footer line
    str_replace('\\s+\\(1\\)(.|\n|\r)*%', ';sun_percent') %>%
    # switch decimal character
    str_replace_all(',', '.') %>%
    # use comma as separator
    str_replace_all(';', ',') %>%
    write_file('climate_data.csv')
climate_data <- read_csv('climate_data.csv', na='-') %>%
    # translate to english
    rename(
        Year = Jahr
        , `SummerDays` = Sommertage
        , `FrostDays` = Frosttage
        , `IceDays` = Eistage
        , `HeatDays` = Hitzetage
    ) %>%
    mutate(
        IceDays = -IceDays
        , FrostDays = -FrostDays
    )
## Rows: 74 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (7): Jahr, Frosttage, Eistage, Sommertage, Hitzetage, Sonnenscheindauer,...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
climate_data
## # A tibble: 74 × 7
##     Year FrostDays IceDays SummerDays HeatDays Sonnenscheindauer sun_percent
##    <dbl>     <dbl>   <dbl>      <dbl>    <dbl>             <dbl>       <dbl>
##  1  1950       -62     -31         65        8              1959        45.8
##  2  1951       -51     -10         51        1              1898        44.4
##  3  1952       -93     -26         46       13              1847        43.2
##  4  1953       -79     -20         59        7              2140        50.0
##  5  1954       -79     -40         41        7              1784        41.7
##  6  1955       -91     -26         25        1              1710        40.0
##  7  1956      -102     -47         34        3              1937        45.3
##  8  1957       -56     -32         41       10              1822        42.6
##  9  1958       -77     -12         38        9              1767        41.3
## 10  1959       -63     -16         36        5              1964        45.9
## # ℹ 64 more rows

Index days

Visualize the data

Area Chart

fig <- plot_ly(
    climate_data
    , x = ~Year
    , y = ~SummerDays
    , name = 'Summer Days'
    , type = 'scatter'
    , mode = 'none'
    , stackgroup = 'positive'
    , fillcolor = 'orange'
    )

fig <- fig %>% add_trace(y = ~HeatDays, name = 'Heat Days', fillcolor = 'red')
fig <- fig %>% add_trace(y = ~FrostDays, name = 'Frost Days', fillcolor = 'lightblue', stackgroup = "negative")
fig <- fig %>% add_trace(y = ~IceDays, name = 'Ice Days', fillcolor = 'blue', stackgroup = "negative")

fig <- fig %>% layout(
    title = 'Climatological Index Days in Vienna, Austria, 1950-2023'
    , xaxis = list(title = 'Year')
    , yaxis = list(title = 'Number of Days')
    , legend = list(x = 0.05, y = 0.95)
    , hovermode = "x unified"
    )

fig 
## Warning: Ignoring 1 observations
api_create(fig, filename = 'climate_index_days_vienna', fileopt='overwrite')
## Warning: Ignoring 1 observations
## Found a grid already named: 'climate_index_days_vienna Grid'. Since fileopt='overwrite', I'll try to update it
## Found a plot already named: 'climate_index_days_vienna'. Since fileopt='overwrite', I'll try to update it

Regression

fig <- climate_data %>%
    select(Year:HeatDays) %>%
    pivot_longer(cols = -Year, names_to = 'Index', values_to = 'Days') %>%
    mutate(
        Days = abs(Days)
        , Index = recode(Index
            , `SummerDays` = 'Summer Days'
            , `FrostDays` = 'Frost Days'
            , `IceDays` = 'Ice Days'
            , `HeatDays` = 'Heat Days'
            )
        , Index = factor(Index, levels = c('Heat Days', 'Summer Days', 'Frost Days', 'Ice Days'))
        ) %>%
    ggplot(aes(x = Year, y = Days, color = Index)) +
    geom_point(alpha=0.5) +
    theme_minimal() +
    geom_smooth(method = 'lm', se = FALSE) +
    scale_color_manual(values = c('red', 'orange', 'lightblue', 'blue')) +
    labs(
        title = 'Climatological Index Days in Vienna, Austria, 1950-2023'
        , x = 'Year'
        , y = 'Number of Days'
        , color = 'Index'
    ) 
    
fig <- fig %>% ggplotly() 
## `geom_smooth()` using formula = 'y ~ x'
fig <- fig %>% layout(
    legend = list(x = 0.45, y = 1)
    )

fig
api_create(fig, filename = 'climate_index_days_vienna_regression', fileopt='overwrite')
## Found a grid already named: 'climate_index_days_vienna_regression Grid'. Since fileopt='overwrite', I'll try to update it
## Found a plot already named: 'climate_index_days_vienna_regression'. Since fileopt='overwrite', I'll try to update it

get regression coefficients

climate_data %>%
    select(Year:HeatDays) %>%
    pivot_longer(cols = -Year, names_to = 'Index', values_to = 'Days') %>%
    mutate(
        Days = abs(Days)
        , Index = recode(Index
            , `SummerDays` = 'Summer Days'
            , `FrostDays` = 'Frost Days'
            , `IceDays` = 'Ice Days'
            , `HeatDays` = 'Heat Days'
            )
        , Index = factor(Index, levels = c('Heat Days', 'Summer Days', 'Frost Days', 'Ice Days'))
        ) %>%
    group_by(Index) %>%
    summarise(
        Intercept = lm(Days ~ Year)$coefficients[1]
        , Slope = lm(Days ~ Year)$coefficients[2]
        , p_value = summary(lm(Days ~ Year))$coefficients[2, 4]
    ) 
## # A tibble: 4 × 4
##   Index       Intercept  Slope  p_value
##   <fct>           <dbl>  <dbl>    <dbl>
## 1 Heat Days       -619.  0.319 6.48e-11
## 2 Summer Days    -1376.  0.721 1.01e-15
## 3 Frost Days       871. -0.404 1.44e- 5
## 4 Ice Days         508. -0.245 8.40e- 5